
use "1850_full.dta", clear

*Exclude people living in group quarters
tab gq
keep if gq~=3 & gq~=4

gen male1850 = sex==1
replace male1850 = . if sex==.

gen white1850 = race==1
replace white1850 = . if race==.

egen realprop_parents = rowtotal(realprop_pop realprop_mom), missing
gen nonmissingrealpropparents = realprop_parents~=.

keep histid realprop_parents nonmissingrealpropparents age male1850 white1850 stateicp countyicp

* rename the variables to indicate the year they are from
for var histid realprop_parents age stateicp countyicp: rename X X1850

*Now merge with Eckert crosswalk
rename stateicp1850 icpsrst 
rename countyicp1850 icpsrcty
	
*Merge on cty_fips
joinby icpsrst icpsrcty using "EGLP_1850.dta", unmatched(both)

bysort nhgisst_1990 nhgiscty_1990: egen maxweight = max(weight)

rename _merge merge1850crosswalk

gen cty_fips = nhgisst_1990*100 + (nhgiscty_1990/10)

*Keep counties that VA independent cities were a part of
*https://www.bea.gov/system/files/methodologies/LAPI-Methodology.pdf
gen VAcounties = cty_fips==51177|cty_fips==51165|cty_fips==51121|cty_fips==51053

*For the VA counties replace the county fips of the surrounding county as the county of the independent city
*This will keep those records when merge to Normal Asylum counties

replace cty_fips = 51730 if cty_fips==51053
replace cty_fips = 51750 if cty_fips==51121
replace cty_fips = 51630 if cty_fips==51177
replace cty_fips = 51660 if cty_fips==51165

*Drop non-merged from crosswalk
drop if merge1850crosswalk==2 & VAcounties~=1

*See censustreemerge18501860.do for description of why some IPUMS data does not merge
*to crosswalk.  Here, drop WV people because want people in what are today the
*present-day states.  But keep the one LA county that does not merge to crosswalk.  This does
*not matter for the main analysis since it is not a normal or asylum county, but it does matter 
*for the national distribution.  Similarly, keep the Texas county for the distribution before drop
*the small sample size states
 
tab icpsrst if merge1850crosswalk==1
tab icpsrst if merge1850crosswalk==1 & icpsrst~=45 & icpsrst~=49
drop if merge1850crosswalk==1 & icpsrst~=45 & icpsrst~=49

rename cty_fips cty_fips1850

tostring cty_fips1850, gen(cty_fipstr)
gen statefip1850 = substr(cty_fipstr, 1,1) if length(cty_fipstr)==4
replace statefip1850 = substr(cty_fipstr, 1, 2) if length(cty_fipstr)==5
destring statefip1850, replace

*For the LA county, replace statefip equal to LA
*For the TX county, replace statefip equal to TX
tab icpsrst if statefip1850==.

replace statefip1850 = 22 if statefip1850==. & icpsrst==45
replace statefip1850 = 48 if statefip1850==. & icpsrst==49

*Keep states that did not have schools by 1860: AL, AR, AZ, CA, CO, FL, GA, IA, ID, IN, KS, 
*KY, LA, MD, ME, MN, MO, MS, MT, NC, ND, NE, NH, NM, OH, OK, OR, RI, SC, SD, TN, TX, UT, VA,
*VT, WA, WI, WV, and drop DE which did not have a normal school

#delimit ;
keep if statefip1850==1|statefip1850==5|statefip1850==4|statefip1850==6|statefip1850==8|
statefip1850==12|statefip1850==13|statefip1850==19|statefip1850==16|statefip1850==18|
statefip1850==20|statefip1850==21|statefip1850==22|statefip1850==24|statefip1850==23|
statefip1850==27|statefip1850==29|statefip1850==28|statefip1850==30|statefip1850==37|
statefip1850==38|statefip1850==31|statefip1850==33|statefip1850==35|statefip1850==39|statefip1850==40|
statefip1850==41|statefip1850==44|statefip1850==45|statefip1850==46|statefip1850==47|
statefip1850==48|statefip1850==49|statefip1850==51|statefip1850==50|
statefip1850==53|statefip1850==55|statefip1850==54;

#delimit cr
*Drop states that were not yet states on Census Day 1850: 

#delimit ;
drop if statefip1850==6|statefip1850==27|statefip1850==38|statefip1850==46|statefip1850==49|statefip==4|
statefip==8|statefip==16|statefip==30|statefip==31|statefip==32|statefip==35|statefip==40|
statefip==41|statefip==53|statefip==54|statefip==56;

#delimit cr

*Drop Georgia, RI, SC because no asylum counties

drop if statefip==13|statefip==44|statefip==45

tab statefip

egen taghistid = tag(histid1850)
tab realprop_parents if age>=16 & age<=18 & white1850==1 & taghistid==1

*Dropping states with small samples that aren't in the main Table 1 regression
tab realprop_parents if age>=16 & age<=18 & white1850==1 & taghistid==1 & statefip~=12 & statefip~=19 & statefip~=48 & statefip~=55
